

rm(list = ls(all=TRUE))
cat("\f")

library(rootSolve)
library(nloptr)
library(entropy)
library(LaplacesDemon) # KLD function 

source("generate_data.R")
source("process_data.R")
source("compute_nde.R") # { G-formula={Y,M}, IPW={M,A}, Mixed={A,Y}, AIPW={A,M,Y} }
source("compute_mse.R") # includes {estimate_y} for reparam estimation of Y

set.seed(0)

# ################################################
# Data
# ################################################

# initialization 
n = 5000 

nde_0.vec <- nde_0hat.vec <- nde_gform.vec <- nde_aipw.vec <- nde_mixed.vec <- nde_ipw.vec <- c()
mse_0.vec <- mse_gform.vec <- mse_aipw.vec <- mse_mixed.vec <- mse_ipw.vec <- c()
mle_0.vec <- mle_gform.vec <- mle_aipw.vec <- mle_mixed.vec <- mle_ipw.vec <- c()
kl_0.vec <- kl_gform.vec <- kl_aipw.vec <- kl_mixed.vec <- kl_ipw.vec <- c()

trials = 100

for(t in 1:trials){
 
cat("\n \n trial = ", t, "\n \n")

# generate data
data_obj = generate_data(n)
dat = data_obj$data
beta_y0 = data_obj$beta_y
beta_m0 = data_obj$beta_m
beta_a0 = data_obj$beta_a

beta_0 = list(beta_y = beta_y0, 
              beta_m = beta_m0, 
              beta_a = beta_a0)

# Calculate p(C, A, M, Y)
prob_truth = data_obj$prob
mle_0 = data_obj$mle

attach(dat, warn.conflicts = FALSE)

# Generate train and test data 
n1 = floor(n*0.8) # 20% test data
dat_train = dat[1:n1, ]
dat_test = dat[(n1+1):n, ]
Y_test = dat_test$Y
dat_test$Y = NA

# Formulas
fmla_y = as.formula(Y ~ C + A + AC + M + CM + AM + ACM)
fmla_m = as.formula(M ~ C + A + AC)
fmla_a = as.formula(A ~ C)
fmla = list(fmla_y = fmla_y, fmla_m = fmla_m, fmla_a = fmla_a)

Y_samp = sample_Y(dat_train, beta_y0, J=1000)
results = matrix(0, nrow = 5, ncol = 5)
colnames(results) = c("estimator", "mle", "nde", "mse", "kl")

# w_truth = dnorm(dat_train$C, 0, 1) 


# ################################################
# (0) Unconstrained MLE 
# ################################################

source("0_unconstrainedMLE.R")

# Initialization
opt_0 = list(reparam = FALSE, 
             estimator = "G-formula")
px_0 = rep(1/n1, n1)

# Fitting 
model_0hat = optimize_unconst(dat, dat_train, dat_test, fmla)

# Evaluations 
beta_0hat = list(beta_a = model_0hat$beta_a, beta_m = model_0hat$beta_m, beta_y = model_0hat$beta_y)
mle_0hat = model_0hat$mle
nde_0hat = compute_effect(dat_train, beta_0hat, px_0, opt_0)   
mse_0 = mean((Y_test - model_0hat$Yhat_test)^2)
kl_0 = (1/n)*(mle_0 - mle_0hat)

# True effect
nde_0 = compute_effect(dat_train, beta_0, px_0, opt_0)  

# ################################################
# (1) Constrained MLE 
# ################################################

source("1_constrainedMLE.R")

# Initialization 
opt_1 = list(reparam = FALSE, 
             tau_l = -0.05, 
             tau_u = 0.05)
             # tau_l = -0.00001, 
             # tau_u = 0.00001)

px_1 = rep(1/n1, n1)
beta_start_1 = NULL
func = compute_effect


estimators = c("IPW", "Mixed", "AIPW", "G-formula")


# ++++++++++++++++++++++++++++++++
# G-fromula
# ++++++++++++++++++++++++++++++++

opt_1$estimator = "G-formula"

# Fitting
model_gform = optimize_nloptr(dat, dat_train, func, beta_start_1, px_1, fmla, opt_1)

# Evaluations 
beta_gform = list(beta_a = model_gform$beta_a, beta_m = model_gform$beta_m, beta_y = model_gform$beta_y)
mle_gform = model_gform$mle
nde_gform = compute_effect(dat_train, beta_gform, px_1, opt_1)   
mse_gform = compute_mse(dat_test, Y_test, beta_gform, opt_1)
kl_gform = (1/n)*(mle_0 - mle_gform)


# ++++++++++++++++++++++++++++++++
# AIPW
# ++++++++++++++++++++++++++++++++

opt_1$estimator = "AIPW"

# Fitting
model_aipw = optimize_nloptr(dat, dat_train, func, beta_start_1, px_1, fmla, opt_1)

# Evaluations 
beta_aipw = list(beta_a = model_aipw$beta_a, beta_m = model_aipw$beta_m, beta_y = model_aipw$beta_y)
mle_aipw = model_aipw$mle
nde_aipw = compute_effect(dat_train, beta_aipw, px_1, opt_1)   
mse_aipw = compute_mse(dat_test, Y_test, beta_aipw, opt_1)
kl_aipw = (1/n)*(mle_0 - mle_aipw)


# ++++++++++++++++++++++++++++++++
# MIXED
# ++++++++++++++++++++++++++++++++

opt_1$estimator = "Mixed"

# Fitting
model_mixed = optimize_nloptr(dat, dat_train, func, beta_start_1, px_1, fmla, opt_1)

# Evaluations 
beta_mixed = list(beta_a = model_mixed$beta_a, beta_m = model_mixed$beta_m, beta_y = model_mixed$beta_y)
mle_mixed = model_mixed$mle
nde_mixed = compute_effect(dat_train, beta_mixed, px_1, opt_1)   
mse_mixed = compute_mse(dat_test, Y_test, beta_mixed, opt_1)
kl_mixed = (1/n)*(mle_0 - mle_mixed)

# ++++++++++++++++++++++++++++++++
# MIXED
# ++++++++++++++++++++++++++++++++

opt_1$estimator = "IPW"

# Fitting
model_ipw = optimize_nloptr(dat, dat_train, func, beta_start_1, px_1, fmla, opt_1)

# Evaluations 
beta_ipw = list(beta_a = model_ipw$beta_a, beta_m = model_ipw$beta_m, beta_y = model_ipw$beta_y)
mle_ipw = model_ipw$mle
nde_ipw = compute_effect(dat_train, beta_ipw, px_1, opt_1)   
mse_ipw = compute_mse(dat_test, Y_test, beta_ipw, opt_1)
kl_ipw = (1/n)*(mle_0 - mle_ipw)


nde_0.vec <- c(nde_0.vec, nde_0)
nde_0hat.vec <- c(nde_0hat.vec, nde_0hat)
nde_gform.vec <- c(nde_gform.vec, nde_gform)
nde_aipw.vec <- c(nde_aipw.vec, nde_aipw)
nde_mixed.vec <- c(nde_mixed.vec, nde_mixed)
nde_ipw.vec <- c(nde_ipw.vec, nde_ipw)

mse_0.vec <- c(mse_0.vec, mse_0)
mse_gform.vec <- c(mse_gform.vec, mse_gform)
mse_aipw.vec <- c(mse_aipw.vec, mse_aipw)
mse_mixed.vec <- c(mse_mixed.vec, mse_mixed)
mse_ipw.vec <- c(mse_ipw.vec, mse_ipw)

mle_0.vec <- c(mle_0.vec, mle_0)
mle_gform.vec <- c(mle_gform.vec, mle_gform)
mle_aipw.vec <- c(mle_aipw.vec, mle_aipw)
mle_mixed.vec <- c(mle_mixed.vec, mle_mixed)
mle_ipw.vec <- c(mle_ipw.vec, mle_ipw)

kl_0.vec <- c(kl_0.vec, kl_0)
kl_gform.vec <- c(kl_gform.vec, kl_gform)
kl_aipw.vec <- c(kl_aipw.vec, kl_aipw)
kl_mixed.vec <- c(kl_mixed.vec, kl_mixed)
kl_ipw.vec <- c(kl_ipw.vec, kl_ipw)

}

results_sim1 = round(data.frame(nde_0 = nde_0hat.vec,
                           nde_gform = nde_gform.vec,
                           nde_aipw = nde_aipw.vec,
                           nde_mixed = nde_mixed.vec,
                           nde_ipw = nde_ipw.vec,
                           mse_0 = mse_0.vec,
                           mse_gform = mse_gform.vec,
                           mse_aipw = mse_aipw.vec,
                           mse_mixed = mse_mixed.vec,
                           mse_ipw = mse_ipw.vec,
                           mle_0 = mle_0.vec,
                           mle_gform = mle_gform.vec,
                           mle_aipw = mle_aipw.vec,
                           mle_mixed = mle_mixed.vec,
                           mle_ipw = mle_ipw.vec,
                           kl_0 = kl_0.vec,
                           kl_gform = kl_gform.vec,
                           kl_aipw = kl_aipw.vec,
                           kl_mixed = kl_mixed.vec,
                           kl_ipw = kl_ipw.vec), 6)

write.csv(results_sim1, "results_sim1.csv", row.names = F)

# res = read.csv("results_sim1.csv")

